import requests
from bs4 import BeautifulSoup
import pandas as pd
url='https://e.dangdang.com/list-WY1-dd_sale-0-1.html'
res=requests.get(url)
soup=BeautifulSoup(res.text,'html.parser')
first_level=soup.select('#nav_left >div> div.first_level.publication.publisher')
data=[]
for i in first_level:
    first_category=i.find('h3').text
    first_url=i.find('a')['href']
    second_level=i.find('ul',class_='second_level')
    for j in second_level:
        try:
            second_category=j.find('h4').text
            second_url=j.find('a')['href']
            third_level=j.find('ul',class_='third_level')
            for k in third_level:
                try:
                    third_category = k.find('li').text
                    third_url = k.get('href')
                    data.append({
                        'first_category':first_category,
                        'second_category':second_category,
                        'third_category':third_category,
                        'url':third_url
                    })
                except:
                    continue
        except :
            continue
df=pd.DataFrame(data)
df.to_csv('../file/第三分类标题.csv',index=False,encoding='utf_8_sig')
print('爬取完成')